To run this code, first set your credentials in the environment variables SB_USER and SB_PASS. Or, change this to use the StatsBomb free data!
matches <- StatsBombR::get.matches(
Sys.getenv('SB_USER'),
Sys.getenv('SB_PASS'),
season_id = params$season_id,
competition_id = params$comp_id
)
team_name <- matches[matches$home_team.home_team_id == params$team_id,]$home_team.home_team_name[1]
# slice to the last n games by the selected team
match_ids <- matches |>
dplyr::filter(
home_team.home_team_id == params$team_id | away_team.away_team_id == params$team_id
) |>
dplyr::mutate(match_date = as.Date(match_date)) |>
dplyr::arrange(dplyr::desc(match_date)) |>
dplyr::slice(1:params$n_games) |>
dplyr::pull(match_id)
# pull out the match events for all the games
match_events <- StatsBombR::allevents(
Sys.getenv('SB_USER'),
Sys.getenv('SB_PASS'),
matches = match_ids
) |>
StatsBombR::allclean()
## Time difference of 5.748711 secs
goalkicks <- match_events |>
dplyr::filter(pass.type.name == 'Goal Kick', team.id == params$team_id) |>
dplyr::mutate(
success = !is.na(pass.outcome.id)
) |>
dplyr::select(
x = location.x, y = location.y, end.x = pass.end_location.x, end.y = pass.end_location.y, success
)
create_goalkick_chart(goalkicks, colour = params$colour) +
ggplot2::ggtitle(
label = paste0(team_name, ' Goalkick Zones'),
subtitle = paste0(
'Last ', params$n_games, ' Games'
)
)
for(pass.num in 1:3) {
passes <- match_events |>
dplyr::filter(type.name == 'Pass') |>
dplyr::group_by(match_id, possession) |>
dplyr::mutate(
action_number = dplyr::row_number()
) |>
# just passes in the first third
dplyr::filter(
location.x[1] <= 40
) |>
dplyr::ungroup() |>
dplyr::filter(
team.id == params$team_id,
action_number == pass.num,
!is.na(pass.pass_cluster_id)
) |>
dplyr::select(
x = location.x,
y = location.y,
end.x = pass.end_location.x,
end.y = pass.end_location.y,
cluster = pass.pass_cluster_id
)
plot <- create_pass_clusters_chart(passes, colour = params$colour) +
ggplot2::ggtitle(
label = paste0(
team_name,
' First Third ',
toOrdinal::toOrdinal(pass.num),
' Passes'
),
subtitle = paste0(
'Last ', params$n_games, ' Games'
)
)
print(plot)
}
passes <- match_events |>
dplyr::filter(
type.name == 'Pass',
team.id == params$team_id,
!is.na(pass.pass_cluster_id),
position.id %in% c(9, 10, 11, 13, 14, 15)
) |>
dplyr::select(
player.name,
x = location.x,
y = location.y,
end.x = pass.end_location.x,
end.y = pass.end_location.y,
cluster = pass.pass_cluster_id
)
for(player in unique(passes$player.name)) {
plot <- passes |>
dplyr::filter(player.name == player) |>
create_pass_clusters_chart(n.clust = 5, colour = params$colour) +
ggplot2::ggtitle(
label = paste0(
player, ' Most Common Passes'
),
subtitle = paste0(
'Last ', params$n_games, ' Games'
)
)
print(plot)
}
combinations <- match_events |>
dplyr::group_by(match_id) |>
dplyr::mutate(
reception_team.id = dplyr::if_else(type.name == 'Ball Receipt*', team.id, NA)
) |>
tidyr::fill(reception_team.id, .direction = 'down') |>
dplyr::mutate(
reception_team.id = dplyr::if_else(is.na(reception_team.id), 0, reception_team.id)
) |>
dplyr::group_by(match_id, reception_team.id) |>
dplyr::mutate(reception_number = cumsum(type.name == 'Ball Receipt*')) |>
dplyr::filter(reception_number > 0) |>
dplyr::group_by(match_id, reception_team.id, reception_number) |>
dplyr::slice(1:3) |>
dplyr::filter(
any(type.name == 'Pass'), type.name %in% c('Pass', 'Ball Receipt*')
)
first_passes <- combinations |>
dplyr::filter(type.name == 'Pass') |>
dplyr::slice(1) |>
dplyr::ungroup() |>
dplyr::select(match_id, reception_team.id, reception_number, following_pass.id = id, player.id)
receptions <- combinations |>
dplyr::slice(1) |>
dplyr::select(
match_id,
reception_team.id,
reception_number,
location.x,
location.y,
team.id,
player.id,
player.name,
related_events
) |>
dplyr::mutate(
previous_pass.id = sapply(related_events, \(x) x[[1]])
) |>
dplyr::select(-related_events) |>
dplyr::left_join(
first_passes, by = c('match_id', 'reception_team.id', 'reception_number', 'player.id')
) |>
dplyr::filter(!is.na(following_pass.id)) |>
dplyr::left_join(
dplyr::select(
match_events,
previous_pass.id = id,
previous_location.x = location.x,
previous_location.y = location.y,
previous_pass.angle = pass.angle
)
) |>
dplyr::left_join(
dplyr::select(
match_events,
following_pass.id = id,
following_location.x = location.x,
following_location.y = location.y,
following_pass.end_location.x = pass.end_location.x,
following_pass.end_location.y = pass.end_location.y,
following_pass.angle = pass.angle
)
) |>
dplyr::filter(team.id == params$team_id, abs(previous_pass.angle) <= pi / 2) |>
dplyr::mutate(
change_in_angle = (following_pass.angle - previous_pass.angle) %% (2 * pi) - pi
)
## Joining with `by = join_by(previous_pass.id)`
## Joining with `by = join_by(following_pass.id)`
player <- receptions |> dplyr::filter(player.id == params$player_id)
create_passing_sonars(player, nbins = 20, colour = params$colour)
under_pressure <- match_events |>
dplyr::filter(under_pressure == TRUE, team.id == params$team_id, type.name == 'Pass')
passing_under_pressure <- under_pressure |>
dplyr::group_by(player.id, player.name) |>
dplyr::summarise(
total_passes = dplyr::n(),
expected_passes = sum(pass.pass_success_probability, na.rm = TRUE),
complete_passes = sum(is.na(pass.outcome.id))
) |>
dplyr::filter(total_passes >= 20) |>
dplyr::mutate(
average_pass_over_expectation = 10 * (complete_passes - expected_passes) / total_passes
)
## `summarise()` has grouped output by 'player.id'. You can override using the
## `.groups` argument.
ggplot2::ggplot(passing_under_pressure) +
ggplot2::geom_bar(
ggplot2::aes(
y = reorder(player.name, average_pass_over_expectation),
x = average_pass_over_expectation,
fill = average_pass_over_expectation
),
colour = "#18191A",
alpha = 0.9,
stat = 'identity'
) +
ggplot2::scale_fill_gradient2(
guide = NULL,
midpoint = 0,
low = "#b41313",
mid = "#a1a1a1",
high = "#007a3e"
) +
ggplot2::xlab('Passes Completed Above Expected') +
theme_tutorial() +
ggplot2::theme(
axis.text.y = ggplot2::element_text(hjust = 1),
axis.text.x = ggplot2::element_text(),
axis.title.x = ggplot2::element_text(),
)
gamestates <- StatsBombR::get.gamestate(match_events)[[1]] |>
dplyr::group_by(
GameState, team.id
) |>
dplyr::summarise(
average_expected_passes = mean(pass.pass_success_probability, na.rm = TRUE),
average_pass_length = mean(pass.length, na.rm =TRUE)
) |>
dplyr::filter(team.id == params$team_id)
## Joining with `by = join_by(match_id)`
## Joining with `by = join_by(match_id)`
## Joining with `by = join_by(team.id, team.name, match_id)`
## Joining with `by = join_by(match_id, GameState)`
## Joining with `by = join_by(match_id, team.name)`
## Joining with `by = join_by(match_id, team.name, GameState)`
## `summarise()` has grouped output by 'GameState'. You can override using the `.groups` argument.
metrics <- c('Average Expected Pass' = 'average_expected_passes', 'Average Pass Length' = 'average_pass_length')
create_gamestate_scatter(gamestates, metrics = metrics)